import jieba

from .user_tools import tmp, txt_import, words_set

__all__ = ['text_flesh']


@tmp
def text_flesh(path):
    """
    输入文本路径，进行分词，以及提取时间空间信息

    默认缓存已处理过的信息，缓存文件路径为"./tmp/text_flesh.json"

    :param path: 文本路径
    :return: 信息元组，第一个元素为分词结果，第二个元素为空间信息（经纬度），第三个元素为时间信息
    """
    # 导入文本
    data = txt_import(path)

    # 载入情绪分类字典
    jieba.load_userdict("./data/anger.txt")
    jieba.load_userdict("./data/disgust.txt")
    jieba.load_userdict("./data/fear.txt")
    jieba.load_userdict("./data/joy.txt")
    jieba.load_userdict("./data/sadness.txt")

    # 导入停用词
    stopset = words_set("./data/baidu_stopwords.txt")

    # 预处理数据
    emotion_words, space_data, time_data = [], [], []
    for i in range(len(data)):
        # 去除文末链接、停用词，进行分词
        temp = list(jieba.cut(data[i][0].split("http")[0]))
        emotion_words.append([word for word in temp if word not in stopset])

        # GCJ-02 坐标
        space_data.append(list(map(float, data[i][1:3])))

        # Tue Jul 08 16:31:26 +0800 2014
        time_data.append(data[i][3])

    return emotion_words, space_data, time_data
